/******************************************************************************
Input(s): Manufacturing_1977_2001.dta

This .do file has 2 sections.
SECTION 1:
	- Imports the trimmed sample restriction
	- Constructs measures of productivity and distortions
	- Constructs and exports industry counterfactuals
	- Constructs aggregate counterfactual
	
******************************************************************************/

clear programs
clear all
set more off
set matsize 11000

local data "/FILE PATH GOES HERE/"
local boots "/FILE PATH GOES HERE/"

local alphaL = "$alphaL"
local alphaK = "$alphaK"
local sigma = "$sigma"
local dtypes "ASM CMF"
local fname = "$fname"
local pftype = "$pftype"
local dtrim = "$dtrim"

/*SECTION 1: Constructing productivity and distortion measures*/
local S1 = 1

/*********************************************************
SECTION 1: 
	- Imports trimmed sample restriction
	- Construct measures of productivity and distortion
	- 
		
**********************************************************/
if `S1' == 1 {
!gunzip /FILE PATH GOES HERE/Manufacturing_1977_2009.dta.gz
use "`data'Manufacturing_1977_2009.dta", clear
*!gzip /FILE PATH GOES HERE/Manufacturing_1977_2009.dta

keep et fk_naics02 year lbdnum PY2 Y2_* L K sw R wt
drop if year>2007

/*Merge in sample restriction*/
if "`dtrim'" == "fixed" {
merge 1:1 lbdnum year fk_naics02 using "`data'Misallocation_Sample_vrts_1acf_sest_5y"
drop _m
}

if "`dtrim'" == "none" {
gen sample_ASM = 1 if et==0
gen sample_CMF = 1 if year==1977 | year==1982 | year==1987 | year == 1992 | year == 1997 | year == 2002 | year == 2007
}

if "`dtrim'" == "own" & "`pftype'" == "5y" {
merge 1:1 lbdnum year fk_naics02 using "`data'Misallocation_Sample_vrts_1acf_sest_5y"
drop _m
}

if "`dtrim'" == "own" & "`pftype'" == "5y_2" {
merge 1:1 lbdnum year fk_naics02 using "`data'Misallocation_Sample_vrts_2acf_sest_5y"
drop _m
}

if "`dtrim'" == "own" & "`pftype'" == "10y" {
merge 1:1 lbdnum year fk_naics02 using "`data'Misallocation_Sample_vrts_1acf_sest_10y"
drop _m
}

if "`dtrim'" == "own" & "`pftype'" == "base" {
merge 1:1 lbdnum year fk_naics02 using "`data'Misallocation_Sample_vrts_1acf_sest_base"
drop _m
}

gen period = 1 if year<=1982
	replace period = 2 if year>1982 & year<=1987
	replace period = 3 if year>1987 & year<=1992
	replace period = 4 if year>1992 & year<=1997
	replace period = 5 if year>1997 & year<=2002
	replace period = 6 if year>2002 & year<=2007

gen dec = 1 if year<=1987
	replace dec = 2 if year>1987 & year<=1997
	replace dec = 3 if year>1997 & year<=2007

/*Merge in production-function parameters*/
local boots "/FILE PATH GOES HERE/Bootstrap/"
merge m:1 fk period using "`boots'/estimation_pairs_LP_ky_AB.dta", keepusing(fk period sigma aL aK)
drop if _m==2
drop _m


gen alphaL = aL
gen alphaK = aK
*gen sigma = `sigma'

if "`sigma'" == "sigma_3" {
	replace sigma = 3
	}
	
if "`fname'" == "crts_s3_AB" {
	replace alphaL = alphaL/(alphaL+alphaK)
	replace alphaK = alphaK/(alphaL+alphaK)
}

*gen Y2 = Y2_sigma`sigmaY'
gen double Y2 = (PY2)^(sigma/(sigma-1))
gen beta = (alphaL+alphaK)

*drop *acf_* *_hk_* sigma_* period dec
drop period dec

/*Dealing with potentially censored observations*/
drop if alphaL==.
drop if alphaK==.

/*Dropping observations with industries where imposing sigma leads the markup to exceed RTS*/
*drop if alphaL+alphaK>sigma/(sigma-1)

/*Calculate physical productivity, TFPQ*/
gen double tfpq_ASM = Y2/((L^alphaL)*(K^alphaK))
gen double tfpq_CMF = tfpq_ASM

	replace tfpq_ASM = . if sample_ASM!=1
	replace tfpq_CMF = . if sample_CMF!=1
	
/*Calculate relative establishment size in the absence of distortions and the
	presence of variable markups
	(Doable because it requires the productivity levels only)
*/
local dtypes "ASM CMF"
sort fk_naics02 year
foreach d of local dtypes {
	preserve
	
	local maxiter = 10000

	keep if sample_`d'==1
	
	gen temp = beta/(sigma/(sigma-1))
	keep if temp<.99
	drop temp

	by fk year: egen tfpq_rank_`d' = rank(tfpq_`d'), field

	/*Initial value = 1/N*/
	by fk year: egen count = sum(sample_`d'*wt)
	gen s_0 = 1/count if sample_`d'==1

	/*Step size*/
	gen stepsize = 1-beta/(sigma/(sigma-1))
		replace stepsize = 0.5 if stepsize>0.5

	/*Storing final results*/
	gen s_final = .
	gen s_iter_final = .
	gen s_sum_final = .
	local threshold = 1e-6

	/*Initialize Guess*/
	gen s_old = s_0


	/*Do each iteration from 1 to maxiter (set above)*/
	forvalues iter = 1/`maxiter' {
		
		/*Create new share values using formula*/
		gen temp = ((1-s_old)^beta)*tfpq_`d' if sample_`d'==1
		by fk year: egen tempmean = mean(temp)
		gen tempnorm = temp/tempmean
		
		gen double temp2 = (tempnorm)^((sigma-1)/(beta+sigma*(1-beta))) if sample_`d'==1
		by fk year: egen double temp3 = sum(temp2*wt)
		gen double s_new = temp2/temp3
		
		/*Checking Convergence*/
		by fk year: egen double ssum = sum((s_new-s_old)^2*wt)
		
		replace s_final = s_new if ssum<`threshold' & ssum!=0
		replace s_iter_final = `iter' if ssum<`threshold' & ssum!=0
		replace s_sum_final = ssum if ssum<`threshold' & ssum!=0
		
		drop temp tempmean tempnorm temp2 temp3
		
		/*Save Output:
		- Check if the "if statement" is empty
			(Criterion: sum of squared deviations is less than threshold)
			-- if empty, proceed to next iteration
			-- if not empty, save tempfile with the datapoints
		*/
		quietly sum year if ssum<`threshold' & ssum!=0
		local nobs = r(N)
		
		if `nobs' != 0 {
		tempfile current
			save "`current'"
			
		keep if ssum<`threshold' & ssum!=0
		keep fk year lbdnum tfpq_rank_`d' s_final s_sum_final s_iter_final
		
		tempfile shares_`d'_`iter'
			save "`shares_`d'_`iter''"
		
		use "`current'", clear
		
		}
			
		drop if ssum<`threshold' & ssum!=0
		
		/*If we reach the last iteration without having converged for some 
			industries, save the values from the last iteration*/
		if `iter' == `maxiter' {
		replace s_final = s_new
		replace s_iter_final = `iter'
		replace s_sum_final = ssum
		}
		
		
		replace s_new = (1-stepsize)*s_old+stepsize*s_new
		drop s_old ssum
		rename s_new s_old
		
		disp("DONE WITH ITERATION `iter'")
		
		quietly sum year
		local nobs = r(N)
		
		/*If all industry-years converge before the last iteration, exit loop*/
		if `nobs' == 0 {
		continue, break
		}
		
		}
		
	sort fk year tfpq_rank_`d'
	keep fk year lbdnum tfpq_rank_`d' s_final s_sum_final s_iter_final

	tempfile rest
		save "`rest'"

	clear
	forvalues iter = 1/`maxiter' {
	capture append using "`shares_`d'_`iter''"
		}

	capture append using "`rest'"
		
	keep year fk lbdnum s_final
	sort fk year lbdnum
		rename s_final s_final_`d'
	tempfile fshares_`d'
	save "`fshares_`d''"

	restore
	}

sort fk year lbdnum
merge 1:1 fk year lbdnum using "`fshares_ASM'"
drop _m
merge 1:1 fk year lbdnum using "`fshares_CMF'"
drop _m


/*Calculate relative distortions*/
local dtypes "ASM CMF"
sort fk_naics02 year
foreach d of local dtypes {
	if "`d'"=="ASM" {
		gen temp = PY2*wt if sample_`d'==1
		by fk_naics02 year: egen py_weight_total = sum(temp) if sample_`d'==1
		gen py_weight = (temp)/py_weight_total
		drop temp py_weight_total
		
		gen temp = (K/(PY2)) if sample_`d'==1
		gen temp2 = temp*py_weight
		by fk_naics02 year: egen tauk_bar = sum(temp2)
		gen tauk_`d' = tauk_bar/temp
		drop tauk_bar temp temp2
		
		gen temp = (L/(PY2)) if sample_`d'==1
		gen temp2 = temp*py_weight
		by fk_naics02 year: egen taul_bar = sum(temp2)
		gen taul_`d' = taul_bar/temp
		drop taul_bar temp temp2 py_weight
		
		}
	if "`d'"=="CMF" {
		gen temp = PY2 if sample_`d'==1
		by fk_naics02 year: egen py_weight_total = sum(temp) if sample_`d'==1
		gen py_weight = (temp)/py_weight_total
		drop temp py_weight_total

		gen temp = (K/(PY2)) if sample_`d'==1
		gen temp2 = temp*py_weight
		by fk_naics02 year: egen tauk_bar = sum(temp2)
		gen tauk_`d' = tauk_bar/temp
		drop tauk_bar temp temp2
		
		gen temp = (L/(PY2)) if sample_`d'==1
		gen temp2 = temp*py_weight
		by fk_naics02 year: egen taul_bar = sum(temp2)
		gen taul_`d' = taul_bar/temp
		drop taul_bar temp temp2 py_weight
		
		}
	}
	
/*Calculate TFPR Ratio and Counterfactual TFPR Ratio 
TFPR_bar/TFPR = (PY_i/PY_ie)^(1-beta) * (MRPK_barMRPK)^alphaK * (MRPL_bar/MRPL)^alphaL */
local dtypes "ASM CMF"
sort fk_naics02 year
foreach d of local dtypes {
	/*Part 1: PY Ratio:
		Note: whenever we want to construct a weighted average, we need 
		to account for sampling. Hence, a weight is
		(wt*py)/(sum of wt*py)
		However, when we want to calculate the TFPR ratio, we need to 
		know the size of the firm in the industry, and the total industry
		size has to be adjusted for sampling, but we don't want to
		allows the weight to affect firm size. Hence, 
		tfpr1 = (sum(wt*py)/py)^(1-beta)
		
		and when we sum the A*TFPR terms, we do wt*(A*TFPR)^(sigma-1)
		to ensure that we still have a representative industry*/
	if "`d'"=="ASM" {
		gen py_weighted = PY2*wt if sample_`d'==1
		by fk_naics02 year: egen py_weighted_sum = sum(py_weighted) if sample_`d'==1
		}
	if "`d'"=="CMF" {
		gen py_weighted = PY2 if sample_`d'==1
		by fk_naics02 year: egen py_weighted_sum = sum(py_weighted) if sample_`d'==1
		}
		
	gen double tfpr1 = (py_weighted_sum/PY2)^(1-alphaL-alphaK) if sample_`d'==1
	
	/*Part 2: MRPK Ratio*/
	gen double tfpr2 = (tauk_`d')^(-alphaK)
	
	/*Part 3: MRPL Ratio*/
	gen double tfpr3 = (taul_`d')^(-alphaL)
	
	gen double tfpr_ratio_`d' = tfpr1*tfpr2*tfpr3
	
	drop tfpr1 tfpr2 tfpr3 py_weighted py_weighted_sum
	
	/*Counterfactual TFPR under VARIABLE MARKUPS*/
	gen double temp = s_final_`d'*(1-s_final_`d')
	if "`d'"=="ASM" {
		by fk_naics02 year: egen double temp2 = sum(temp*wt)
		}
	if "`d'"=="CMF" {
		by fk_naics02 year: egen double temp2 = sum(temp)
		}
	gen double part1 = (1/s_final_`d')^(1-alphaK-alphaL)
	gen double part2 = ((1-s_final_`d')/temp2)^(alphaK+alphaL)
	gen double tfpr2_ratio_cf_`d' = part1*part2
		drop temp temp2 part1 part2
	
	
	}
	
/*Industry TFP*/
sort fk_naics02 year

gen double temp = (tfpq_ASM*tfpr_ratio_ASM) if sample_ASM==1
by fk year: egen double tempmean = mean(temp)
gen double tempnorm = temp/tempmean
gen double temp2 = wt*tempnorm^(sigma-1) if sample_ASM==1
by fk_naics02 year: egen double temp3 = sum(temp2)
gen double TFP2_ASM = temp3^(1/(sigma-1))*tempmean
drop temp temp2 temp3 tempmean tempnorm

gen double temp = (tfpq_CMF*tfpr_ratio_CMF) if sample_CMF==1
by fk year: egen double tempmean = mean(temp)
gen double tempnorm = temp/tempmean
gen double temp2 = tempnorm^(sigma-1) if sample_CMF==1
by fk_naics02 year: egen double temp3 = sum(temp2)
gen double TFP2_CMF = temp3^(1/(sigma-1))*tempmean
drop temp temp2 temp3 tempmean tempnorm

/*Counterfactual Industry TFP*/
sort fk_naics02 year

gen double temp = (tfpq_ASM*tfpr2_ratio_cf_ASM) if sample_ASM==1
by fk year: egen double tempmean = mean(temp)
gen double tempnorm = temp/tempmean
gen double temp2 = wt*tempnorm^(sigma-1) if sample_ASM==1
by fk_naics02 year: egen double temp3 = sum(temp2)
gen double TFP3_cf_ASM = temp3^(1/(sigma-1))*tempmean
drop temp temp2 temp3 tempmean tempnorm

gen double temp = (tfpq_CMF*tfpr2_ratio_cf_CMF) if sample_CMF==1
by fk year: egen double tempmean = mean(temp)
gen double tempnorm = temp/tempmean
gen double temp2 = tempnorm^(sigma-1) if sample_CMF==1
by fk_naics02 year: egen double temp3 = sum(temp2)
gen double TFP3_cf_CMF = temp3^(1/(sigma-1))*tempmean
drop temp temp2 temp3 tempmean tempnorm

/*Industry Misallocation*/
gen double M3_i_ASM = TFP3_cf_ASM/TFP2_ASM
gen double M3_i_CMF = TFP3_cf_CMF/TFP2_CMF

/*Keeping and cleaning the third version of the calculation*/
gen double M_i_ASM = M3_i_ASM
gen double M_i_CMF = M3_i_CMF


	/*Flagging Industry Observations when RTS>markup*/
	gen flag = 1 if (alphaL+alphaK)/(sigma/(sigma-1))>0.995
	replace M_i_ASM = . if flag==1
	replace M_i_CMF = . if flag==1

/*Aggregate Misallocation*/
local dtypes "ASM CMF"
foreach d of local dtypes {
	/*Industry Value-Added*/
	if "`d'"=="ASM" {
		gen py_weighted = PY2*wt if sample_`d'==1 & flag!=1
		by fk_naics02 year: egen py_weighted_sum_`d' = sum(py_weighted) if sample_`d'==1
		drop py_weighted
		}
	if "`d'"=="CMF" {
		gen py_weighted = PY2 if sample_`d'==1 & flag!=1
		by fk_naics02 year: egen py_weighted_sum_`d' = sum(py_weighted) if sample_`d'==1
		drop py_weighted
		}
	}
collapse (mean) M3_* M_* py_weighted* alphaL alphaK sigma flag, by(fk_naics02 year)

local dtypes "ASM CMF"
foreach d of local dtypes {
	bys year: egen py_total = sum(py_weighted_sum_`d')
	gen sh_py_`d' = py_weighted_sum_`d'/py_total
	drop py_total
	
	gen temp = sh_py_`d'*log(M_i_`d')
	by year: egen temp2 = sum(temp)
	gen M_`d'=exp(temp2)
	drop temp temp2
	}
	
keep year fk_naics02 M* alphaL alphaK sigma flag
	foreach var of varlist M* alphaL alphaK sigma flag {
		rename `var' `var'_VM_`fname'
		}

save "`data'Misallocation_Industry_VM_`fname'.dta", replace

}

/*End of File*/
